# Date- 5/3/23
# Script No. 4
# Name: ROBUST
# File target: 1) Robustness checks for representation gap 
#              2) Robustness checks for party extremity
#              3) Robustness checks for party polarization


library(foreign)
library(stargazer)
library(sandwich)
library(broom)
library(lmtest)
library(Ecdat)
library(estimatr)
library(jtools)
library(huxtable)
library(sjPlot)
library(sjmisc)
library(tidyr)
library(pequod)
library(fastDummies)
library(lme4)
library(lmerTest)
library(psycho)
library(plm)
library(lmtest)
library(multiwayvcov)

###################################################################################################################
######################################## (1) Representation #######################################################
###################################################################################################################
setwd("C:/Users/yaira/Desktop/ideological polarization/FINAL/Tables and Graphs")

# Loading data from r.cses IMD
r.cses.imd <- read_csv("cses_imd.csv")

# capturing only module 4 from the IMD
r.cses.imd.4<- split.data.frame(r.cses.imd,r.cses.imd$IMD1008_MOD_4)
r.cses.4 <-  r.cses.imd.4[[2]]

## Creating new var that contains for every voter the perceived party placement  
## for the party he voted for.

r.cses.4$voted.for <- NA

for (i in 1:nrow(r.cses.4)){
  if (r.cses.4$IMD3002_PR_1[i] <9000000){
    r.cses.4$voted.for[i] <- r.cses.4$IMD3002_PR_1[i]
  } else if (r.cses.4$IMD3002_LH_PL[i] <9000000){
    r.cses.4$voted.for[i] <- r.cses.4$IMD3002_LH_PL[i]
  } else if (r.cses.4$IMD3002_LH_DC[i] <9000000){
    r.cses.4$voted.for[i] <- r.cses.4$IMD3002_LH_DC[i]
  }
}

summary(r.cses.4$voted.for)
describe(r.cses.4$voted.for)

# Droping NAs
r.cses.4.noNA <- r.cses.4 %>% drop_na(voted.for)

## Creating a variable for parties' left-right placement by those who voted for them
r.cses.4.noNA$voted.for.score <- NA

# amendments: in germany voted for var (IMD3002_LH_PL)  refers to CDU as 2760001 
# and party placement (IMD5012_A) as 2760002 (see IMD5000_A).
r.cses.4.noNA$IMD5000_A[which(r.cses.4.noNA$IMD1006_NAM=="Germany")] <- 2760001

for (i in 1:nrow(r.cses.4.noNA)){
  score <- r.cses.4.noNA$voted.for[i]
  if (r.cses.4.noNA$IMD5000_A[i] == score){
    r.cses.4.noNA$voted.for.score[i] <- r.cses.4.noNA$IMD5012_A[i]
  } else if (r.cses.4.noNA$IMD5000_B[i] == score){
    r.cses.4.noNA$voted.for.score[i] <- r.cses.4.noNA$IMD5012_B[i]
  } else if (r.cses.4.noNA$IMD5000_C[i] == score){
    r.cses.4.noNA$voted.for.score[i] <- r.cses.4.noNA$IMD5012_C[i]
  } else if (r.cses.4.noNA$IMD5000_D[i] == score){
    r.cses.4.noNA$voted.for.score[i] <- r.cses.4.noNA$IMD5012_D[i]
  } else if (r.cses.4.noNA$IMD5000_E[i] == score){
    r.cses.4.noNA$voted.for.score[i] <- r.cses.4.noNA$IMD5012_E[i]
  } else if (r.cses.4.noNA$IMD5000_F[i] == score){
    r.cses.4.noNA$voted.for.score[i] <- r.cses.4.noNA$IMD5012_F[i]
  } else if (r.cses.4.noNA$IMD5000_G[i] == score){
    r.cses.4.noNA$voted.for.score[i] <- r.cses.4.noNA$IMD5012_G[i]
  } else if (r.cses.4.noNA$IMD5000_H[i] == score){
    r.cses.4.noNA$voted.for.score[i] <- r.cses.4.noNA$IMD5012_H[i]
  } else if (r.cses.4.noNA$IMD5000_I[i] == score){
    r.cses.4.noNA$voted.for.score[i] <- r.cses.4.noNA$IMD5012_I[i]
  }
}

# test
select(r.cses.4.noNA, voted.for, IMD5000_A, IMD5012_A, voted.for.score)

summary(r.cses.4.noNA$voted.for.score)
describe(r.cses.4.noNA$voted.for.score)

# droping NA's
r.cses.4.noNA$voted.for.score[r.cses.4.noNA$voted.for.score>10] <- NA
r.cses.4.noNA <- r.cses.4.noNA %>% drop_na(voted.for.score)

## measure the gap between voter's self placement to voter's perceived party placement
voter.4 = list()
Vcdf.4 = list()
party.4 = list()
Pcdf.4 = list()
rep.gap.4 = list()
pol.gap.4 = list()
r.cses.4.noNA$IMD3006[r.cses.4.noNA$IMD3006>10] <- NA
r.cses.4.noNA <- r.cses.4.noNA %>% drop_na(IMD3006)
write.csv(r.cses.4.noNA, "C:\\Users\\yaira\\Desktop\\ideological polarization\\paperWD\\Do and Data\\r.cses.4.noNA.csv")

# Loading Political Party Database
r.ppdb <- read_csv("ppdb.csv")
r.ppdb.noNA <- r.ppdb %>% drop_na(PCSESID)

for (i in unique(r.ppdb.noNA$PCSESID)) {
  # CDF for voters self placement
  v4 <- as.matrix(na.omit(r.cses.4.noNA$IMD3006[which(r.cses.4.noNA$voted.for == i)]))
  voter.4[[i]] <- v4
  Vcdf.4[[i]] <- empirical_cdf(v4, ubounds=seq(0, 10, by=1.0))  
  # CDF for voters perceived party placement
  p4 <- as.matrix(na.omit(r.cses.4.noNA$voted.for.score[which(r.cses.4.noNA$voted.for == i)]))
  party.4[[i]] <- p4
  Pcdf.4[[i]] <- empirical_cdf(p4, ubounds=seq(0, 10, by=1.0))
  # measuring the gap between the CDFs
  rep.gap.4[[as.character(i)]] <- sum(abs(Pcdf.4[[i]][[3]] - Vcdf.4[[i]][[3]]))
  pol.gap.4[[as.character(i)]] <- sum(Pcdf.4[[i]][[3]] - Vcdf.4[[i]][[3]])
}

## adding the representaion gap and polarizaation to the r.ppdb database
# rep
Rep.Gap.4 <- unlist(rep.gap.4)
Rep.Gap.4 <- data.frame("PCSESID" = names(Rep.Gap.4), "party.gap" = (Rep.Gap.4),
                        stringsAsFactors = F)
r.ppdb.noNA <- merge(r.ppdb.noNA,Rep.Gap.4, by = "PCSESID")
# pol
Pol.Gap.4 <- unlist(pol.gap.4)
Pol.Gap.4 <- data.frame("PCSESID" = names(Pol.Gap.4), "party.pol" = (Pol.Gap.4),
                        stringsAsFactors = F)
r.ppdb.noNA <- merge(r.ppdb.noNA, Pol.Gap.4, by = "PCSESID")
View(select(r.ppdb.noNA, COUNTRY, PTYNAME, party.gap, party.pol)) 

##### adding Chile, Hungary and Italy from r.cses.5 #####

r.cses.5 <- read_csv("cses5.csv")

## Creating new var that contains for every voter the perceived party placement  
## for the party that he voted to.

r.cses.5$voted.for <- NA

for (i in 1:nrow(r.cses.5)){
  if (r.cses.5$E3013_LH_PL[i] <900000){
    r.cses.5$voted.for[i] <- r.cses.5$E3013_LH_PL[i]
  } else if (r.cses.5$E3013_LH_DC[i] <900000){
    r.cses.5$voted.for[i] <- r.cses.5$E3013_LH_DC[i]
  }
}

summary(r.cses.5$voted.for)
describe(r.cses.5$voted.for)

# Droping NAs
r.cses.5.noNA <- r.cses.5 %>% drop_na(voted.for)

r.cses.5.noNA$voted.for.score <- NA

for (i in 1:nrow(r.cses.5.noNA)){
  
  if (r.cses.5.noNA$voted.for[i] == 152001){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_A[i]
  } else if (r.cses.5.noNA$voted.for[i] == 348001){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_A[i]
  } else if (r.cses.5.noNA$voted.for[i] == 152002){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_B[i]
  } else if (r.cses.5.noNA$voted.for[i] == 348002){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_B[i]
  } else if (r.cses.5.noNA$voted.for[i] == 380002){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_B[i]
  } else if (r.cses.5.noNA$voted.for[i] == 152003){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_C[i]
  } else if (r.cses.5.noNA$voted.for[i] == 348003){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_C[i]
  } else if (r.cses.5.noNA$voted.for[i] == 380003){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_C[i]
  } else if (r.cses.5.noNA$voted.for[i] == 152004){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_D[i]
  } else if (r.cses.5.noNA$voted.for[i] == 348004){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_D[i]
  } else if (r.cses.5.noNA$voted.for[i] == 380004){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_D[i]
  } else if (r.cses.5.noNA$voted.for[i] == 152005){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_E[i]
  } else if (r.cses.5.noNA$voted.for[i] == 380008){
    r.cses.5.noNA$voted.for.score[i] <- r.cses.5.noNA$E3019_H[i]
  } 
}

# test
(r.cses.5.noNA[r.cses.5.noNA[, "voted.for"] == 348002,c("voted.for","E3019_B","voted.for.score")])

summary(r.cses.5.noNA$voted.for.score)
describe(r.cses.5.noNA$voted.for.score)

r.cses.5.noNA$voted.for.score[r.cses.5.noNA$voted.for.score>10] <- NA

## measure the gap between voter's self placement to voter's perceived party placement

voter.5 = list()
Vcdf.5 = list()
party.5 = list()
Pcdf.5 = list()
rep.gap.5 = list()
pol.gap.5 = list()

# r.cses 5 self-placement
r.cses.5.noNA$E3020[r.cses.5.noNA$E3020>10] <- NA
r.cses.5.noNA <- r.cses.5.noNA %>% drop_na(E3020)
write.csv(r.cses.5.noNA, "C:\\Users\\yaira\\Desktop\\ideological polarization\\paperWD\\Do and Data\\r.cses.5.noNA.csv")

r.cses.5.noNA$voted.for.score[r.cses.5.noNA$voted.for.score>10] <- NA
r.cses.5.noNA <- r.cses.5.noNA %>% drop_na(voted.for.score)

for (i in unique(r.ppdb.noNA$PCSESID)) {
  # CDF for voters self placement
  v5 <- as.matrix(na.omit(r.cses.5.noNA$E3020[which(r.cses.5.noNA$voted.for == i)]))
  voter.5[[i]] <- v5
  Vcdf.5[[i]] <- empirical_cdf(v5, ubounds=seq(0, 10, by=1.0))  
  # CDF for voters perceived party placement
  p5 <- as.matrix(na.omit(r.cses.5.noNA$voted.for.score[which(r.cses.5.noNA$voted.for == i)]))
  party.5[[i]] <- p5
  Pcdf.5[[i]] <- empirical_cdf(p5, ubounds=seq(0, 10, by=1.0))
  # measuring the gap between the CDFs
  rep.gap.5[[as.character(i)]] <- sum(abs(Pcdf.5[[i]][[3]]-Vcdf.5[[i]][[3]]))
  pol.gap.5[[as.character(i)]] <- sum(Pcdf.5[[i]][[3]]-Vcdf.5[[i]][[3]])
}

## adding the representaion gap and polarization to the r.ppdb database

Rep.Gap.5 <- unlist(rep.gap.5)
Rep.Gap.5 <- data.frame("PCSESID" = names(Rep.Gap.5), "party.gap.5" = (Rep.Gap.5),
                        stringsAsFactors = F)
r.ppdb.noNA <- merge(r.ppdb.noNA,Rep.Gap.5, by = "PCSESID")

Pol.Gap.5 <- unlist(pol.gap.5)
Pol.Gap.5 <- data.frame("PCSESID" = names(Pol.Gap.5), "party.pol.5" = (Pol.Gap.5),
                        stringsAsFactors = F)
r.ppdb.noNA <- merge(r.ppdb.noNA,Pol.Gap.5, by = "PCSESID")

##### adding Netherland's parties from r.cses.3 #####

r.cses.imd.3<- split.data.frame(r.cses.imd,r.cses.imd$IMD1008_MOD_3)
r.cses.3 <-  r.cses.imd.3[[2]]

## Creating new var which contains the perceived party placement of every voter 
## for the party that he voted to.

# Creating a voted for variable

r.cses.3$voted.for <- NA

for (i in 1:nrow(r.cses.3)){
  if (r.cses.3$IMD3002_PR_1[i] <9000000){
    r.cses.3$voted.for[i] <- r.cses.3$IMD3002_PR_1[i]
  }  else if (r.cses.3$IMD3002_LH_PL[i] <9000000){
    r.cses.3$voted.for[i] <- r.cses.3$IMD3002_LH_PL[i]
  } else if (r.cses.3$IMD3002_LH_DC[i] <9000000){
    r.cses.3$voted.for[i] <- r.cses.3$IMD3002_LH_DC[i]
  }
}

summary(r.cses.3$voted.for)

r.cses.3.noNA <- r.cses.3 %>% drop_na(voted.for)

r.cses.3.noNA$voted.for.score <- NA

for (i in 1:nrow(r.cses.3.noNA)){
  score <- r.cses.3.noNA$voted.for[i]
  if (r.cses.3.noNA$IMD5000_A[i] == score){
    r.cses.3.noNA$voted.for.score[i] <- r.cses.3.noNA$IMD5012_A[i]
  } else if (r.cses.3.noNA$IMD5000_B[i] == score){
    r.cses.3.noNA$voted.for.score[i] <- r.cses.3.noNA$IMD5012_B[i]
  } else if (r.cses.3.noNA$IMD5000_C[i] == score){
    r.cses.3.noNA$voted.for.score[i] <- r.cses.3.noNA$IMD5012_C[i]
  } else if (r.cses.3.noNA$IMD5000_D[i] == score){
    r.cses.3.noNA$voted.for.score[i] <- r.cses.3.noNA$IMD5012_D[i]
  } else if (r.cses.3.noNA$IMD5000_E[i] == score){
    r.cses.3.noNA$voted.for.score[i] <- r.cses.3.noNA$IMD5012_E[i]
  } else if (r.cses.3.noNA$IMD5000_G[i] == score){
    r.cses.3.noNA$voted.for.score[i] <- r.cses.3.noNA$IMD5012_G[i]
  } else if (r.cses.3.noNA$IMD5000_H[i] == score){
    r.cses.3.noNA$voted.for.score[i] <- r.cses.3.noNA$IMD5012_H[i]
  } else if (r.cses.3.noNA$IMD5000_I[i] == score){
    r.cses.3.noNA$voted.for.score[i] <- r.cses.3.noNA$IMD5012_I[i]
  }
}

select(r.cses.3.noNA, voted.for, IMD5000_B, IMD5012_B, voted.for.score)

summary(r.cses.3.noNA$voted.for.score)
describe(r.cses.3.noNA$voted.for.score)

# measure the gap between voter's self placement to voter's perceived party placement

voter.3 = list()
Vcdf.3 = list()
party.3 = list()
Pcdf.3 = list()
rep.gap.3 = list()
pol.gap.3 = list()

r.cses.3.noNA$IMD3006[r.cses.3.noNA$IMD3006>10] <- NA
r.cses.3.noNA <- r.cses.3.noNA %>% drop_na(IMD3006)

r.cses.3.noNA$voted.for.score[r.cses.3.noNA$voted.for.score>10] <- NA
r.cses.3.noNA <- r.cses.3.noNA %>% drop_na(voted.for.score)
write.csv(r.cses.3.noNA, "C:\\Users\\yaira\\Desktop\\ideological polarization\\paperWD\\Do and Data\\r.cses.3.noNA.csv")

for (i in unique(r.ppdb.noNA$PCSESID)) {
  # CDF for voters self placement
  v3 <- as.matrix(na.omit(r.cses.3.noNA$IMD3006[which(r.cses.3.noNA$voted.for == i)]))
  voter.3[[i]] <- v3
  Vcdf.3[[i]] <- empirical_cdf(v3, ubounds=seq(0, 10, by=1.0))  
  # CDF for voters perceived party placement
  p3 <- as.matrix(na.omit(r.cses.3.noNA$voted.for.score[which(r.cses.3.noNA$voted.for == i)]))
  party.3[[i]] <- p3
  Pcdf.3[[i]] <- empirical_cdf(p3, ubounds=seq(0, 10, by=1.0))
  # measuring the gap between the CDFs
  rep.gap.3[[as.character(i)]] <- sum(abs(Pcdf.3[[i]][[3]]-Vcdf.3[[i]][[3]]))
  pol.gap.3[[as.character(i)]] <- sum(Pcdf.3[[i]][[3]]-Vcdf.3[[i]][[3]])
}

Rep.Gap.3 <- unlist(rep.gap.3)
Rep.Gap.3 <- data.frame("PCSESID" = names(Rep.Gap.3), "party.gap.3" = (Rep.Gap.3),
                        stringsAsFactors = F)

r.ppdb.noNA <- merge(r.ppdb.noNA,Rep.Gap.3, by = "PCSESID")

Pol.Gap.3 <- unlist(pol.gap.3)
Pol.Gap.3 <- data.frame("PCSESID" = names(Pol.Gap.3), "party.pol.3" = (Pol.Gap.3),
                        stringsAsFactors = F)

r.ppdb.noNA <- merge(r.ppdb.noNA,Pol.Gap.3, by = "PCSESID")

##### adding the ex-r.cses.4 data to the r.ppdb data frame #####

r.ppdb.noNA$count.V <- NA
r.ppdb.noNA$count.P <- NA
r.ppdb.noNA$mean.V <- NA
r.ppdb.noNA$mean.P <- NA

for (i in 1:nrow(r.ppdb.noNA)){
  
  # entering valuse for the Nethelands from modul 3
  if (r.ppdb.noNA$COUNTRY[i] == "Netherlands"){
    
    party.gap <- i
    r.ppdb.noNA$party.gap[i] <- Rep.Gap.3[party.gap,2]
    
    party.pol <- i
    r.ppdb.noNA$party.pol[i] <- Pol.Gap.3[party.pol,2]
    
    countV <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$count.V[i] <- nrow(voter.3[[countV]])
    
    countP <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$count.P[i] <- nrow(party.3[[countP]])
    
    meanV <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$mean.V[i] <- mean(voter.3[[meanV]])
    
    meanP <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$mean.P[i] <- mean(party.3[[meanP]])
    
  } else if (r.ppdb.noNA$COUNTRY[i] == "Chile"){
    
    party.gap <- i
    r.ppdb.noNA$party.gap[i] <- Rep.Gap.5[party.gap,2]
    
    party.pol <- i
    r.ppdb.noNA$party.pol[i] <- Pol.Gap.5[party.pol,2]
    
    countV <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$count.V[i] <- nrow(voter.5[[countV]])
    
    countP <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$count.P[i] <- nrow(party.5[[countP]])
    
    meanV <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$mean.V[i] <- mean(voter.5[[meanV]])
    
    meanP <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$mean.P[i] <- mean(party.5[[meanP]])
    
  } else if (r.ppdb.noNA$COUNTRY[i] == "Hungary"){
    
    party.gap <- i
    r.ppdb.noNA$party.gap[i] <- Rep.Gap.5[party.gap,2]
    
    party.pol <- i
    r.ppdb.noNA$party.pol[i] <- Pol.Gap.5[party.pol,2]
    
    countV <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$count.V[i] <- nrow(voter.5[[countV]])
    
    countP <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$count.P[i] <- nrow(party.5[[countP]])
    
    meanV <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$mean.V[i] <- mean(voter.5[[meanV]])
    
    meanP <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$mean.P[i] <- mean(party.5[[meanP]])
    
  } else if (r.ppdb.noNA$COUNTRY[i] == "Italy"){
    
    party.gap <- i
    r.ppdb.noNA$party.gap[i] <- Rep.Gap.5[party.gap,2]
    
    party.pol <- i
    r.ppdb.noNA$party.pol[i] <- Pol.Gap.5[party.pol,2]
    
    countV <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$count.V[i] <- nrow(voter.5[[countV]])
    
    countP <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$count.P[i] <- nrow(party.5[[countP]])
    
    meanV <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$mean.V[i] <- mean(voter.5[[meanV]])
    
    meanP <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$mean.P[i] <- mean(party.5[[meanP]])
    
  } else {
    
    count.V <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$count.V[i] <- nrow(voter.4[[count.V]])
    
    count.P <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$count.P[i] <- nrow(party.4[[count.P]])
    
    mean.V <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$mean.V[i] <- mean(voter.4[[mean.V]])
    
    mean.P <- r.ppdb.noNA$PCSESID[i]
    r.ppdb.noNA$mean.P[i] <- mean(party.4[[mean.P]])
  } 
}

View(select(r.ppdb.noNA, COUNTRY, PCSESID, PTYNAME, party.gap, party.pol, activists, count.V, count.P))

describe(r.ppdb.noNA$party.gap[which(r.ppdb.noNA$count.P >=14)])
r.ppdb.noNA$party.gap[which(r.ppdb.noNA$count.P < 14)] <- NA

r.ppdb.noNA %>% summarise (count = sum(is.na(party.gap)))
r.ppdb.noNA %>% summarise (count = sum(is.na(activists)))

summary(r.ppdb.noNA$activists)
describe(r.ppdb.noNA$activists)
freq(r.ppdb.noNA$activists)

r.ppdb.fin <- r.ppdb.noNA %>% drop_na (party.gap, activists)
view(select(r.ppdb.fin, COUNTRY, PTYNAME, PCSESID, party.gap, activists, count.V))
r.ppdb.fin$CMP <- ((r.ppdb.fin$LEFTRIGHT/20)+5)

View(select(r.ppdb.fin, COUNTRY, PTYNAME, mean.P, ParlGov, CMP, party.gap, activists))

# scaling
r.ppdb.fin$party.pol.fin <- r.ppdb.fin$party.pol
for (i in 1:nrow(r.ppdb.fin)){
  if (r.ppdb.fin$party.pol[i] > 0 & r.ppdb.fin$pfam[i] > 2) {
    r.ppdb.fin$party.pol.fin[i] <- -1*(r.ppdb.fin$party.pol[i])}
  else if 
  (r.ppdb.fin$party.pol[i] < 0 & r.ppdb.fin$pfam[i] > 2){
    r.ppdb.fin$party.pol.fin[i] <- abs(r.ppdb.fin$party.pol[i])}
}
sum(r.ppdb.fin$party.pol.fin<0)

## rubst. without center parties
r.ppdb.no5 <- r.ppdb.fin %>%
  filter(mean.P<5 | mean.P>=6)

a <- r.ppdb.no5 %>%
  dplyr::select(COUNTRY, PTYNAME, mean.P, mean.V, activists, party.gap, party.pol, party.pol.abs) %>%
  dplyr::filter(r.ppdb.no5$party.pol<0 & r.ppdb.no5$mean.P<5)

r.ppdb.no5$party.pol.fin <- r.ppdb.no5$party.pol
for (i in 1:nrow(r.ppdb.no5)){
  if (r.ppdb.no5$party.pol[i] > 0 & r.ppdb.no5$mean.P[i] >= 6) {
    r.ppdb.no5$party.pol.fin[i] <- -1*(r.ppdb.no5$party.pol[i])}
  else if 
  (r.ppdb.no5$party.pol[i] < 0 & r.ppdb.no5$mean.P[i] >= 6){
    r.ppdb.no5$party.pol.fin[i] <- abs(r.ppdb.no5$party.pol[i])}
}
sum(r.ppdb.no5$party.pol.fin<0)

# out-lier
r.ppdb.fin <- r.ppdb.fin %>% 
  filter(party.gap<3)

summary(r.ppdb.fin$activists)
describe(r.ppdb.fin$activists)

summary(r.ppdb.fin$party.gap)
describe(r.ppdb.fin$party.gap)

scatter.smooth(r.ppdb.fin$activists, r.ppdb.fin$party.gap)
scatter.smooth(r.ppdb.fin$activists, r.ppdb.fin$party.pol.abs)

######################### models ########################

# 1) rep robustness 1: activists without weights, party position by voter - TABLE A5

mod.rep1b <- lm(party.pol ~ activists2 * mean.P,
                data = ppdb.fin)

summary(mod.rep1b)

mod.rep2b <- lm(party.pol ~ activists2 * mean.P+
                  GALLSQ + ENP + medleg + elffrn,
                data = ppdb.fin)
summary(mod.rep2b)

# TABLE A5 in the appendix
stargazer(mod.rep1b, mod.rep2b,
          title="TABLE A5 - robustness 1: act withot wights, party position by voter", align=TRUE,
          dep.var.labels=c(""), 
          omit.stat=c("LL","ser","f"), no.space=TRUE, type = 'html', out = "table A5- representation robast2.htm")

## 2) rep robustness 2: activists with weights, party position by cses collaborators- TABLE A6

r.mod.rep1b <- lm(party.pol ~ activists2*mean.P,
                  data = r.ppdb.fin)

summary(r.mod.rep1b)

r.mod.rep2b <- lm(party.pol ~ activists2*mean.P+
                    GALLSQ + ENP + medleg + elffrn,
                  data = r.ppdb.fin)
summary(r.mod.rep2b)

# cluster s.e.
stargazer(r.mod.rep1, r.mod.rep2,
          title="TABLE A6 - robustness 2: act with wights, party position by CSES", align=TRUE,
          dep.var.labels=c(""), 
          omit.stat=c("LL","ser","f"), no.space=TRUE, type = 'text', out = "representation reg2.htm")

## 2) the effet of activists on representation (party pol) with not weights on activists

mod.rep1b <- lm(party.pol ~ activists2*mean.P,
                data = r.ppdb.fin)

summary(mod.rep1b)

mod.rep2b <- lm(party.pol ~ activists2*mean.P+
                  elffrn + medleg + ENP + GALLSQ,
                data = r.ppdb.fin)
summary(mod.rep2b)



# cluster s.e.
stargazer(mod.rep1b, mod.rep2b,
          title="TABLE 3 - The Effect of Party Activists on ideological incongruence", align=TRUE,
          dep.var.labels=c(""), 
          omit.stat=c("LL","ser","f"), no.space=TRUE, type = 'html', out = "representation2 reg2.htm")


###################################################################################################################
############################################# (2) Party extremity  ################################################
###################################################################################################################

### party extremity = ((party position - country mean positoin)/5)^2

## 1) Finding the mean party position for each country

# filling missing observations of party position based on parlgov and cmp right-left from mean.P r-l
summary(r.ppdb.fin$ParlGov)
corr.test(r.ppdb.fin$ParlGov, r.ppdb.fin$mean.P) #chacking compatibility
for (i in 1:nrow(r.ppdb.fin)){
  if (is.na(r.ppdb.fin$ParlGov[i])==T)
    r.ppdb.fin$ParlGov[i] <- r.ppdb.fin$mean.P[i]
}

r.ppdb.fin$CMP <- ((r.ppdb.fin$LEFTRIGHT/20)+5)
r.ppdb.fin$CMP[which(r.ppdb.fin$CMP < 0)] <- NA
summary(r.ppdb.fin$CMP)
corr.test(r.ppdb.fin$CMP, r.ppdb.fin$mean.P) #chacking compatibility
for (i in 1:nrow(r.ppdb.fin)){
  if (is.na(r.ppdb.fin$CMP[i])==T)
    r.ppdb.fin$CMP[i] <- r.ppdb.fin$mean.P[i]
}
view(select(r.ppdb.fin, COUNTRY, PTYNAME, mean.P, ParlGov, CMP))

### Calculating the country mean by all 3 indicators   

# mean by voter placement
r.ppdb.fin$mean.party.position <- NA

for (i in 1:nrow(r.ppdb.fin)){
  country <- r.ppdb.fin$COUNTRY[i]
  r.ppdb.fin$mean.party.position[which(r.ppdb.fin$COUNTRY==country)] <- 
    mean(r.ppdb.fin$mean.P[which(r.ppdb.fin$COUNTRY==country)])
}

# mean by parlgov
r.ppdb.fin$mean.party.position.PG <- NA
for (i in 1:nrow(r.ppdb.fin)){
  country <- r.ppdb.fin$COUNTRY[i]
  r.ppdb.fin$mean.party.position.PG[which(r.ppdb.fin$COUNTRY==country)] <- 
    mean(r.ppdb.fin$ParlGov[which(r.ppdb.fin$COUNTRY==country)])
}

# mean by CMP
r.ppdb.fin$mean.party.position.CMP <- NA
for (i in 1:nrow(r.ppdb.fin)){
  country <- r.ppdb.fin$COUNTRY[i]
  r.ppdb.fin$mean.party.position.CMP[which(r.ppdb.fin$COUNTRY==country)] <- 
    mean(r.ppdb.fin$CMP[which(r.ppdb.fin$COUNTRY==country)])
}

View(select(r.ppdb.fin, COUNTRY, mean.P, mean.party.position.V, ParlGov, 
            mean.party.position.PG, CMP, mean.party.position.CMP))

### 2) Calculation party extremity
# clean
r.ppdb.fin$party.extremity <- NA
r.ppdb.fin$party.extremity <- (r.ppdb.fin$mean.P-r.ppdb.fin$mean.party.position)

## Dalton formula
# by voters
r.ppdb.fin$party.extremity.V <- NA
r.ppdb.fin$party.extremity.V <- ((r.ppdb.fin$mean.P-r.ppdb.fin$mean.party.position)/5)^2

# by ParlGov
r.ppdb.fin$party.extremity.PG <- NA
r.ppdb.fin$party.extremity.PG <- abs((r.ppdb.fin$ParlGov-r.ppdb.fin$mean.party.position.PG)/5)^2

# by cmp
r.ppdb.fin$party.extremity.CMP <- NA
r.ppdb.fin$party.extremity.CMP <- abs((r.ppdb.fin$CMP-r.ppdb.fin$mean.party.position.CMP)/5)^2

## absolute
# by voters
r.ppdb.fin$party.extremity.V.abs <- NA
r.ppdb.fin$party.extremity.V.abs <- abs(r.ppdb.fin$mean.P-r.ppdb.fin$mean.party.position)

# by ParlGov
r.ppdb.fin$party.extremity.PG.abs <- NA
r.ppdb.fin$party.extremity.PG.abs <- abs(r.ppdb.fin$ParlGov-r.ppdb.fin$mean.party.position.PG)

# by cmp
r.ppdb.fin$party.extremity.CMP.abs <- NA
r.ppdb.fin$party.extremity.CMP.abs <- abs(r.ppdb.fin$CMP-r.ppdb.fin$mean.party.position.CMP)

View(select(r.ppdb.fin, COUNTRY, party.extremity.V, party.extremity.PG, party.extremity.CMP))
View(select(r.ppdb.fin, COUNTRY, party.extremity.n, party.extremity.V.abs))

#####################################
#### Calculating voters extremity ###
#####################################

### voters extremity = (voters of party i mean position ) - (voters of country j mean position)

## Calculating voters' mean position per country 
r.ppdb.fin$mean.voter.position <- NA

for (i in 1:nrow(r.ppdb.fin)){
  country <- r.ppdb.fin$COUNTRY[i]
  r.ppdb.fin$mean.voter.position[which(r.ppdb.fin$COUNTRY==country)] <- 
    mean(r.ppdb.fin$mean.V[which(r.ppdb.fin$COUNTRY==country)])
}
View(select(r.ppdb.fin, COUNTRY, mean.party.position.V, mean.voter.position))

######## the base in from cses.4 ####### not to use #####
setwd("C:/Users/yaira/Desktop/ideological polarization/paperWD/Do and Data")
cses.4.noNA <- read.csv("cses.4.noNA.csv")
cses.4.noNA$mean.voter.position.4 <- NA

for (i in 1:nrow(cses.4.noNA)){
  country <- cses.4.noNA$IMD1006_NAM[i]
  cses.4.noNA$mean.voter.position.4[which(cses.4.noNA$IMD1006_NAM==country)] <- 
    mean(cses.4.noNA$IMD3006[which(cses.4.noNA$IMD1006_NAM==country)])
}
View(select(cses.4.noNA, IMD1006_NAM, mean.voter.position.4))

# importing from cses.4 to the r.ppdb df 
df <- data.frame(unique(cses.4.noNA$IMD1006_NAM), unique(cses.4.noNA$mean.voter.position.4))
names(df) <- c("COUNTRY", "mean.voter.position.4")
view(df)
r.ppdb.fin <- left_join(r.ppdb.fin, df)
r.ppdb.fin$mean.voter.position.4[is.na(r.ppdb.fin$mean.voter.position.4)] <- 0
r.ppdb.fin$mean.voter.position.4[which(r.ppdb.fin$COUNTRY=="United Kingdom")] <-
  df$mean.voter.position.4[which(df$COUNTRY=="Great Britain")]
View(select(r.ppdb.fin, COUNTRY, mean.voter.position.4))

## cses.3 for Netherlands
cses.3.noNA <- read.csv("cses.3.noNA.csv")
cses.3.noNA$mean.voter.position.3 <- NA

for (i in 1:nrow(cses.3.noNA)){
  country <- cses.3.noNA$IMD1006_NAM[i]
  cses.3.noNA$mean.voter.position.3[which(cses.3.noNA$IMD1006_NAM==country)] <- 
    mean(cses.3.noNA$IMD3006[which(cses.3.noNA$IMD1006_NAM==country)])
}
View(select(cses.3.noNA, IMD1006_NAM, mean.voter.position.3))

# importing from cses.3 to the r.ppdb.fin 
df <- data.frame(unique(cses.3.noNA$IMD1006_NAM), unique(cses.3.noNA$mean.voter.position.3))
names(df) <- c("COUNTRY", "mean.voter.position.3")
view(df)
df <- df[df$COUNTRY=="Netherlands",]
r.ppdb.fin <- merge(r.ppdb.fin, df, all.x = T)
r.ppdb.fin$mean.voter.position.3[is.na(r.ppdb.fin$mean.voter.position.3)] <- 0
View(select(r.ppdb.fin, COUNTRY, mean.voter.position.3))

## cses.5 for Chile, Italy and Hungary
cses.5.noNA <- read.csv("cses.5.noNA.csv")
cses.5.noNA$mean.voter.position.5 <- NA

for (i in 1:nrow(cses.5.noNA)){
  country <- cses.5.noNA$E1006_NAM[i]
  cses.5.noNA$mean.voter.position.5[which(cses.5.noNA$E1006_NAM==country)] <- 
    mean(cses.5.noNA$E3020[which(cses.5.noNA$E1006_NAM==country)])
}
View(unique(select(cses.5.noNA, E1006_NAM, mean.voter.position.5)))

# importing from cses.5 to the r.ppdb.fin 
df <- data.frame(unique(cses.5.noNA$E1006_NAM), unique(cses.5.noNA$mean.voter.position.5))
names(df) <- c("COUNTRY", "mean.voter.position.5")
view(df)
r.ppdb.fin <- merge(r.ppdb.fin, df, all.x = T)
r.ppdb.fin$mean.voter.position.5[is.na(r.ppdb.fin$mean.voter.position.5)] <- 0

View(select(r.ppdb.fin, COUNTRY, mean.voter.position.5))

# agragating the three variables
r.ppdb.fin$mean.voter.position <- NA
r.ppdb.fin$mean.voter.position <- r.ppdb.fin$mean.voter.position.3 +
  r.ppdb.fin$mean.voter.position.4 +
  r.ppdb.fin$mean.voter.position.5 

View(select(r.ppdb.fin, COUNTRY, mean.voter.position, mean.voter.position.4, mean.voter.position.5, mean.voter.position.3))


###### Calculating voters' extremity #####

## by Dalton
# by voters
r.ppdb.fin$voter.extremity <- NA
r.ppdb.fin$voter.extremity <- ((r.ppdb.fin$mean.voter.position - r.ppdb.fin$mean.V)/5)^2
summary(r.ppdb.fin$voter.extremity)

## by abs
# by voters
r.ppdb.fin$voter.extremity.abs <- NA
r.ppdb.fin$voter.extremity.abs <- abs(r.ppdb.fin$mean.voter.position - r.ppdb.fin$mean.V)

View(select(r.ppdb.fin, COUNTRY, PTYNAME, mean.voter.position, mean.P, mean.V, voter.extremity, voter.extremity.abs))
View(select(r.ppdb.fin, COUNTRY, PTYNAME, mean.party.position.V, mean.P, party.extremity.n, mean.voter.position, mean.V, voter.extremity.n))

# chacking for mixing political sides between voters and parties 
r.ppdb.fin$mean.party.position < r.ppdb.fin$mean.P & 
  r.ppdb.fin$mean.voter.position > r.ppdb.fin$mean.V


########################### robustness check 1 #####################################
# 2) aticists without weights

# activists on party extremity without weights (table A3)

mod.ex1 <- lm(party.extremity ~ activists2,
              data = ppdb.fin)
summary(mod.ex1)

mod.ex2 <- lm(party.extremity ~ activists2 +
                GALLSQ + ENP + medleg + elffrn,
              data = ppdb.fin)
summary(mod.ex2)


mod.ex3 <- lm(party.extremity ~ activists2 + 
                extrem.V +
                GALLSQ + ENP + medleg + elffrn,
              data = ppdb.fin)
summary(mod.ex3)

mod.ex4 <- lm(party.extremity ~ activists2 + 
                extrem.P +
                GALLSQ + ENP + medleg + elffrn,
              data = ppdb.fin)
summary(mod.ex4)

mod.ex5 <- lm(party.extremity ~ activists2 * extrem.P +
                GALLSQ + ENP + medleg + elffrn,
              data = ppdb.fin)
summary(mod.ex5)


stargazer(mod.ex1, mod.ex2, mod.ex3, mod.ex4, mod.ex5,  
          title= "Table A3 - The Effect of Party Activists on Party Extremity", align=TRUE,
          dep.var.labels=c("","Cluster"), 
          omit.stat=c("LL","ser","f"), no.space=TRUE, type = 'html', out = "table A3- party extremity.htm")

#
library(interactions)
johnson_neyman(mod.ex5, pred = activists, modx = extrem.P, alpha = .05)
#

########################### robustness check 2 #####################################
# 2) aticists with party positions by CMP - TABLE A4

mod.ex.r1b <- lm(party.extremity.CMP ~ activists,
                 data = r.ppdb.fin)
summary(mod.ex.r1b)

mod.ex.r2b <- lm(party.extremity.CMP ~ activists +
                   GALLSQ + ENP + medleg + elffrn,
                 data = r.ppdb.fin)
summary(mod.ex.r2b)


mod.ex.r3b <- lm(party.extremity.CMP ~ activists + 
                   extrem.V +
                   GALLSQ + ENP + medleg + elffrn,
                 data = r.ppdb.fin)
summary(mod.ex.r3b)

mod.ex.r4b <- lm(party.extremity.CMP ~ activists + 
                   extrem.P +
                   GALLSQ + ENP + medleg + elffrn,
                 data = r.ppdb.fin)
summary(mod.ex.r4b)

mod.ex.r5b <- lm(party.extremity.CMP ~ activists * extrem.P +
                   + GALLSQ + ENP + medleg + elffrn,
                 data = r.ppdb.fin)
summary(mod.ex.r5b)

stargazer(mod.ex.r1b, mod.ex.r2b, mod.ex.r3b, mod.ex.r4b, mod.ex.r5b,
          title="Table A4- The Effect of Party Activists on Party Extremity", align=TRUE,
          dep.var.labels=c("","Cluster"), 
          omit.stat=c("LL","ser","f"), no.space=TRUE, type = 'html', out = "Table A4- party extremity robust2.htm")

########################### robustness check 3 #####################################
# 3) aticists with party positions by parlgov
mod.ex.r1c <- lm(party.extremity.PG ~ activists,
                 data = r.ppdb.fin)
summary(mod.ex.r1c)

mod.ex.r2c <- lm(party.extremity.PG ~ activists +
                   GALLSQ + ENP + medleg + elffrn,
                 data = r.ppdb.fin)
summary(mod.ex.r2c)


mod.ex.r3c <- lm(party.extremity.PG ~ activists + 
                   extrem.V +
                   GALLSQ + ENP + medleg + elffrn,
                 data = r.ppdb.fin)
summary(mod.ex.r3c)

mod.ex.r4c <- lm(party.extremity.PG ~ activists + 
                   extrem.P +
                   GALLSQ + ENP + medleg + elffrn,
                 data = r.ppdb.fin)
summary(mod.ex.r4c)


mod.ex.r5c <- lm(party.extremity.PG ~ activists * extrem.P +
                   + GALLSQ + ENP + medleg + elffrn,
                 data = r.ppdb.fin)
summary(mod.ex.r5c)

stargazer(mod.ex.r1c, mod.ex.r2c, mod.ex.r3c, mod.ex.r4c, mod.ex.r5c,  
          title="The Effect of Party Activists on Party Extremity", align=TRUE,
          dep.var.labels=c("","Cluster"), 
          omit.stat=c("LL","ser","f"), no.space=TRUE, type = 'text', out = "party extremity robust3.htm")


###################################################################################################################
############################################# (3) Party polarization  #############################################
###################################################################################################################

##################################
### country level polarization ###
##################################

# parties
r.ppdb.fin$party.polarization <- NA
for (i in 1:nrow(r.ppdb.fin)){
  polarization <- r.ppdb.fin$COUNTRY[i]
  r.ppdb.fin$party.polarization[which(r.ppdb.fin$COUNTRY == polarization)] <- 
    sqrt(sum(
      r.ppdb.fin$vote.share[which(r.ppdb.fin$COUNTRY == polarization)] * # vote share
        ((r.ppdb.fin$mean.P[which(r.ppdb.fin$COUNTRY == polarization)] - # party position 
            r.ppdb.fin$mean.party.position[which(r.ppdb.fin$COUNTRY == polarization)])/5)^2)) # country mean
}
summary(r.ppdb.fin$party.polarization)
hist(r.ppdb.fin$party.polarization)

# parties
r.ppdb.fin$party.polarization.CMP <- NA
for (i in 1:nrow(r.ppdb.fin)){
  polarization <- r.ppdb.fin$COUNTRY[i]
  r.ppdb.fin$party.polarization.CMP[which(r.ppdb.fin$COUNTRY == polarization)] <- 
    sqrt(sum(
      r.ppdb.fin$vote.share[which(r.ppdb.fin$COUNTRY == polarization)] * # vote share
        ((r.ppdb.fin$CMP[which(r.ppdb.fin$COUNTRY == polarization)] - # party position 
            r.ppdb.fin$mean.party.position.CMP[which(r.ppdb.fin$COUNTRY == polarization)])/5)^2)) # country mean
}
summary(r.ppdb.fin$party.polarization.CMP)
hist(r.ppdb.fin$party.polarization.CMP)

# voters
r.ppdb.fin$voter.polarization <- NA
for (i in 1:nrow(r.ppdb.fin)){
  polarization <- r.ppdb.fin$COUNTRY[i]
  r.ppdb.fin$voter.polarization[which(r.ppdb.fin$COUNTRY == polarization)] <- 
    sqrt(sum(
      r.ppdb.fin$vote.share[which(r.ppdb.fin$COUNTRY == polarization)] * # vote share
        ((r.ppdb.fin$mean.V[which(r.ppdb.fin$COUNTRY == polarization)] - # party position 
            r.ppdb.fin$mean.voter.position[which(r.ppdb.fin$COUNTRY == polarization)])/5)^2)) # country mean
}
summary(r.ppdb.fin$voter.polarization)
hist(r.ppdb.fin$voter.polarization)

# activists at the country level
r.ppdb.fin <- 
  r.ppdb.fin %>%
  group_by(COUNTRY) %>%
  dplyr::mutate(coun.act = sum(activists),
                coun.act2 = sum(activists2))

r.ppdb.fin.coun <- 
  r.ppdb.fin %>%
  dplyr::select(COUNTRY, party.polarization, party.polarization.CMP, voter.polarization, coun.act, coun.act2,
                medleg, ENP, GALLSQ, elffrn)
r.ppdb.fin.coun <- unique(r.ppdb.fin.coun)
## by Dalton, with weights

mod.pol.1 <- lm(party.polarization ~ coun.act,
                data = r.ppdb.fin.coun)
summary(mod.pol.1)

mod.pol.2 <- lm(party.polarization ~ coun.act +
                  GALLSQ + ENP + medleg + elffrn,
                data = r.ppdb.fin.coun)
summary(mod.pol.2)


stargazer(mod.pol.1, mod.pol.2, 
          title="The Effect of Party Activists on Party Polarization", align=TRUE,
          dep.var.labels=c(""), covariate.labels=c("Activists' Relative Power", "Propostionality", "ENP", "MedD(lagged)", "Elffrn"),
          omit.stat=c("LL","ser","f"), no.space=TRUE, type = 'html', out = "polarization.htm")

############################# robust 1 ###################################
## activists without weights

mod.pol.1r <- lm(party.polarization ~ coun.act2,
                 data = ppdb.fin.coun)
summary(mod.pol.1r)

mod.pol.2r <- lm(party.polarization ~ coun.act2 +
                   GALLSQ + ENP + medleg + elffrn,
                 data = ppdb.fin.coun)
summary(mod.pol.2r)


stargazer(mod.pol.1r, mod.pol.2r, 
          title="TABLE 7A. The Effect of Party Activists on Party Polarization", align=TRUE,
          dep.var.labels=c(""), 
          omit.stat=c("LL","ser","f"), no.space=TRUE, type = 'html', out = "Polarization robust1.htm")

############################# robust 2 ###################################
## with weights on activists party position by CMP
# by Dalton, no weights 

mod.pol.1rb <- lm(party.polarization.CMP ~ coun.act,
                  data = r.ppdb.fin.coun)
summary(mod.pol.1rb)

mod.pol.2rb <- lm(party.polarization.CMP ~ coun.act +
                    medleg + ENP + GALLSQ + elffrn,
                  data = r.ppdb.fin.coun)
summary(mod.pol.2rb)


stargazer(mod.pol.1rb, mod.pol.2rb, 
          title="The Effect of Party Activists on Party Polarization", align=TRUE,
          dep.var.labels=c(""), covariate.labels=c("Activists' Relative Power", "Proportionality", "ENP", "MedD(lagged)", "Elffrn"),
          omit.stat=c("LL","ser","f"), no.space=TRUE, type = 'html', out = "polarization robust2.htm")
#####
mod.dis.PG <- lm(party.extremity.PG ~ activists  +
                   extrem.P + 
                   COUNTRY + medleg + ENP + GALLSQ + elffrn, 
                 data = r.ppdb.fin)
summary(mod.pol.PG)

mod.dis.CMP <- lm(party.extremity.CMP ~ activists + voter.extremity +
                    extrem.P + 
                    COUNTRY + medleg + ENP + GALLSQ + elffrn,
                  data = r.ppdb.fin)
summary(mod.dis.CMP)
stargazer(mod.dis.V1, mod.dis.PG, mod.dis.CMP, type = 'text')

# broich-pagan test
bptest(mod.pol)

# cluster s.e.
stargazer(mod.dis.V1, coeftest(mod.dis.V1, vcovHC, cluser = r.ppdb.fin$COUNTRY),
  mod.dis.PG, coeftest(mod.dis.PG, vcovHC, cluser = r.ppdb.fin$COUNTRY),
  mod.dis.CMP, coeftest(mod.dis.CMP, vcovHC, cluser = r.ppdb.fin$COUNTRY),
  type = 'text')

## multilevel
library(lme4)
library(merTools)
multi.mod.1 <- lmer(party.extremity.V ~ activists + (1 | COUNTRY)
                    + medleg + ENP + GALLSQ + elffrn, 
                    REML = F, data = r.ppdb.fin)
summary(multi.mod.1)
ICC(outcome = "party.extremity.V", group = "COUNTRY", data = r.ppdb.fin)


##########################
### party polarization ###
##########################


mod.pol.PG <- lm(party.polarization.PG ~ activists
                 + extrem.P 
                 + medleg + ENP + GALLSQ + elffrn, 
                 data = r.ppdb.fin)
summary(mod.pol.PG)

mod.pol.CMP <- lm(party.polarization.CMP ~ activists
                  + extrem.P 
                  + medleg + ENP + GALLSQ + elffrn, 
                  data = r.ppdb.fin)
summary(mod.pol.CMP)
stargazer(mod.pol.V1, mod.pol.PG, mod.pol.CMP, type = 'text')

# broich-pagan test
bptest(mod.pol)

# cluster s.e.
stargazer(
  mod.pol.V1, coeftest(mod.pol.V1, vcovHC, cluser = r.ppdb.fin$COUNTRY),
  mod.pol.PG, coeftest(mod.pol.PG, vcovHC, cluser = r.ppdb.fin$COUNTRY),
  mod.pol.CMP, coeftest(mod.pol.CMP, vcovHC, cluser = r.ppdb.fin$COUNTRY),
  type = 'text')

## 2

mod.pol2 <- lm_robust(party.polarization.V ~ activists
                      + extrem.P, data = r.ppdb.fin, fixed_effects =~ COUNTRY, clusters = COUNTRY) 
summary(mod.pol2)

mod.pol.3 <- lmres(party.polarization.V ~ extrem*activists +
                     medleg + ENP + GALLSQ + elffrn,
                   data = r.ppdb.fin)
summary(mod.pol.3)

